{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "<b>Use Google's multimodal model Gemini to classify images</b>\n",
        "\n",
        "Upload your favorite image and get Gemini to describe the image."
      ],
      "metadata": {
        "id": "ZWhWniBGu3_Y"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Configure Gemini API key\n",
        "\n",
        "#Access your Gemini API key\n",
        "\n",
        "import google.generativeai as genai\n",
        "from google.colab import userdata\n",
        "\n",
        "gemini_api_secret_name = 'GOOGLE_API_KEY'  # @param {type: \"string\"}\n",
        "\n",
        "try:\n",
        "  GOOGLE_API_KEY=userdata.get(gemini_api_secret_name)\n",
        "  genai.configure(api_key=GOOGLE_API_KEY)\n",
        "except userdata.SecretNotFoundError as e:\n",
        "   print(f'Secret not found\\n\\nThis expects you to create a secret named {gemini_api_secret_name} in Colab\\n\\nVisit https://makersuite.google.com/app/apikey to create an API key\\n\\nStore that in the secrets section on the left side of the notebook (key icon)\\n\\nName the secret {gemini_api_secret_name}')\n",
        "   raise e\n",
        "except userdata.NotebookAccessError as e:\n",
        "  print(f'You need to grant this notebook access to the {gemini_api_secret_name} secret in order for the notebook to access Gemini on your behalf.')\n",
        "  raise e\n",
        "except Exception as e:\n",
        "  # unknown error\n",
        "  print(f\"There was an unknown error. Ensure you have a secret {gemini_api_secret_name} stored in Colab and it's a valid key from https://makersuite.google.com/app/apikey\")\n",
        "  raise e\n",
        "\n",
        "model = genai.GenerativeModel('gemini-pro-vision')"
      ],
      "metadata": {
        "id": "yFv1abRcv2P2",
        "cellView": "form"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Demo image analysis\n",
        "\n",
        "import PIL.Image, base64, io\n",
        "\n",
        "from IPython.display import HTML, display\n",
        "\n",
        "!curl -o image.jpg https://t0.gstatic.com/licensed-image?q=tbn:ANd9GcQ_Kevbk21QBRy-PgB4kQpS79brbmmEG7m3VOTShAn4PecDU5H5UxrJxE3Dw1JiaG17V88QIol19-3TM2wCHw\n",
        "\n",
        "img = PIL.Image.open('image.jpg')\n",
        "response = model.generate_content(img)\n",
        "\n",
        "# convert the image to Base64 for embedding in HTML\n",
        "buffered = io.BytesIO()\n",
        "img.save(buffered, format=\"JPEG\")\n",
        "img_str = base64.b64encode(buffered.getvalue()).decode()\n",
        "\n",
        "# construct a table to render the view\n",
        "table_html = f\"\"\"\n",
        "<table>\n",
        "<tr>\n",
        "    <td><img src='data:image/jpeg;base64,{img_str}' style='width:500px; height:auto;'></td>\n",
        "</tr>\n",
        "<tr>\n",
        "    <td style='vertical-align: bottom;'>{response.text}</td>\n",
        "</tr>\n",
        "</table>\n",
        "\"\"\"\n",
        "\n",
        "# display the table\n",
        "display(HTML(table_html))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 434
        },
        "cellView": "form",
        "id": "5xo4rMgzqhSA",
        "outputId": "9faab2a6-5c88-40fd-e53a-ffbef886fb91"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
            "\r  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0\r100  405k  100  405k    0     0  2277k      0 --:--:-- --:--:-- --:--:-- 2288k\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "<table>\n",
              "<tr>\n",
              "    <td><img src='' style='width:500px; height:auto;'></td>\n",
              "</tr>\n",
              "<tr>\n",
              "    <td style='vertical-align: bottom;'> These are meal prep containers with chicken, brown rice, broccoli, and peppers.</td>\n",
              "</tr>\n",
              "</table>\n"
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Upload your own image\n",
        "\n",
        "from google.colab import files\n",
        "uploaded = files.upload()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 73
        },
        "cellView": "form",
        "id": "PWkD1iUwuTel",
        "outputId": "52d110fd-4281-405e-9a1b-801ea44abc90"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "     <input type=\"file\" id=\"files-f305f247-c86f-4b0a-b08f-4dcfbdc8b451\" name=\"files[]\" multiple disabled\n",
              "        style=\"border:none\" />\n",
              "     <output id=\"result-f305f247-c86f-4b0a-b08f-4dcfbdc8b451\">\n",
              "      Upload widget is only available when the cell has been executed in the\n",
              "      current browser session. Please rerun this cell to enable.\n",
              "      </output>\n",
              "      <script>// Copyright 2017 Google LLC\n",
              "//\n",
              "// Licensed under the Apache License, Version 2.0 (the \"License\");\n",
              "// you may not use this file except in compliance with the License.\n",
              "// You may obtain a copy of the License at\n",
              "//\n",
              "//      http://www.apache.org/licenses/LICENSE-2.0\n",
              "//\n",
              "// Unless required by applicable law or agreed to in writing, software\n",
              "// distributed under the License is distributed on an \"AS IS\" BASIS,\n",
              "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
              "// See the License for the specific language governing permissions and\n",
              "// limitations under the License.\n",
              "\n",
              "/**\n",
              " * @fileoverview Helpers for google.colab Python module.\n",
              " */\n",
              "(function(scope) {\n",
              "function span(text, styleAttributes = {}) {\n",
              "  const element = document.createElement('span');\n",
              "  element.textContent = text;\n",
              "  for (const key of Object.keys(styleAttributes)) {\n",
              "    element.style[key] = styleAttributes[key];\n",
              "  }\n",
              "  return element;\n",
              "}\n",
              "\n",
              "// Max number of bytes which will be uploaded at a time.\n",
              "const MAX_PAYLOAD_SIZE = 100 * 1024;\n",
              "\n",
              "function _uploadFiles(inputId, outputId) {\n",
              "  const steps = uploadFilesStep(inputId, outputId);\n",
              "  const outputElement = document.getElementById(outputId);\n",
              "  // Cache steps on the outputElement to make it available for the next call\n",
              "  // to uploadFilesContinue from Python.\n",
              "  outputElement.steps = steps;\n",
              "\n",
              "  return _uploadFilesContinue(outputId);\n",
              "}\n",
              "\n",
              "// This is roughly an async generator (not supported in the browser yet),\n",
              "// where there are multiple asynchronous steps and the Python side is going\n",
              "// to poll for completion of each step.\n",
              "// This uses a Promise to block the python side on completion of each step,\n",
              "// then passes the result of the previous step as the input to the next step.\n",
              "function _uploadFilesContinue(outputId) {\n",
              "  const outputElement = document.getElementById(outputId);\n",
              "  const steps = outputElement.steps;\n",
              "\n",
              "  const next = steps.next(outputElement.lastPromiseValue);\n",
              "  return Promise.resolve(next.value.promise).then((value) => {\n",
              "    // Cache the last promise value to make it available to the next\n",
              "    // step of the generator.\n",
              "    outputElement.lastPromiseValue = value;\n",
              "    return next.value.response;\n",
              "  });\n",
              "}\n",
              "\n",
              "/**\n",
              " * Generator function which is called between each async step of the upload\n",
              " * process.\n",
              " * @param {string} inputId Element ID of the input file picker element.\n",
              " * @param {string} outputId Element ID of the output display.\n",
              " * @return {!Iterable<!Object>} Iterable of next steps.\n",
              " */\n",
              "function* uploadFilesStep(inputId, outputId) {\n",
              "  const inputElement = document.getElementById(inputId);\n",
              "  inputElement.disabled = false;\n",
              "\n",
              "  const outputElement = document.getElementById(outputId);\n",
              "  outputElement.innerHTML = '';\n",
              "\n",
              "  const pickedPromise = new Promise((resolve) => {\n",
              "    inputElement.addEventListener('change', (e) => {\n",
              "      resolve(e.target.files);\n",
              "    });\n",
              "  });\n",
              "\n",
              "  const cancel = document.createElement('button');\n",
              "  inputElement.parentElement.appendChild(cancel);\n",
              "  cancel.textContent = 'Cancel upload';\n",
              "  const cancelPromise = new Promise((resolve) => {\n",
              "    cancel.onclick = () => {\n",
              "      resolve(null);\n",
              "    };\n",
              "  });\n",
              "\n",
              "  // Wait for the user to pick the files.\n",
              "  const files = yield {\n",
              "    promise: Promise.race([pickedPromise, cancelPromise]),\n",
              "    response: {\n",
              "      action: 'starting',\n",
              "    }\n",
              "  };\n",
              "\n",
              "  cancel.remove();\n",
              "\n",
              "  // Disable the input element since further picks are not allowed.\n",
              "  inputElement.disabled = true;\n",
              "\n",
              "  if (!files) {\n",
              "    return {\n",
              "      response: {\n",
              "        action: 'complete',\n",
              "      }\n",
              "    };\n",
              "  }\n",
              "\n",
              "  for (const file of files) {\n",
              "    const li = document.createElement('li');\n",
              "    li.append(span(file.name, {fontWeight: 'bold'}));\n",
              "    li.append(span(\n",
              "        `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n",
              "        `last modified: ${\n",
              "            file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n",
              "                                    'n/a'} - `));\n",
              "    const percent = span('0% done');\n",
              "    li.appendChild(percent);\n",
              "\n",
              "    outputElement.appendChild(li);\n",
              "\n",
              "    const fileDataPromise = new Promise((resolve) => {\n",
              "      const reader = new FileReader();\n",
              "      reader.onload = (e) => {\n",
              "        resolve(e.target.result);\n",
              "      };\n",
              "      reader.readAsArrayBuffer(file);\n",
              "    });\n",
              "    // Wait for the data to be ready.\n",
              "    let fileData = yield {\n",
              "      promise: fileDataPromise,\n",
              "      response: {\n",
              "        action: 'continue',\n",
              "      }\n",
              "    };\n",
              "\n",
              "    // Use a chunked sending to avoid message size limits. See b/62115660.\n",
              "    let position = 0;\n",
              "    do {\n",
              "      const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n",
              "      const chunk = new Uint8Array(fileData, position, length);\n",
              "      position += length;\n",
              "\n",
              "      const base64 = btoa(String.fromCharCode.apply(null, chunk));\n",
              "      yield {\n",
              "        response: {\n",
              "          action: 'append',\n",
              "          file: file.name,\n",
              "          data: base64,\n",
              "        },\n",
              "      };\n",
              "\n",
              "      let percentDone = fileData.byteLength === 0 ?\n",
              "          100 :\n",
              "          Math.round((position / fileData.byteLength) * 100);\n",
              "      percent.textContent = `${percentDone}% done`;\n",
              "\n",
              "    } while (position < fileData.byteLength);\n",
              "  }\n",
              "\n",
              "  // All done.\n",
              "  yield {\n",
              "    response: {\n",
              "      action: 'complete',\n",
              "    }\n",
              "  };\n",
              "}\n",
              "\n",
              "scope.google = scope.google || {};\n",
              "scope.google.colab = scope.google.colab || {};\n",
              "scope.google.colab._files = {\n",
              "  _uploadFiles,\n",
              "  _uploadFilesContinue,\n",
              "};\n",
              "})(self);\n",
              "</script> "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Saving cookies.jpeg to cookies.jpeg\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Ask Gemini about your image\n",
        "\n",
        "img = PIL.Image.open(list(uploaded.keys())[0])\n",
        "response = model.generate_content(img)\n",
        "\n",
        "# convert the image to Base64 for embedding in HTML\n",
        "buffered = io.BytesIO()\n",
        "img.save(buffered, format=\"JPEG\")\n",
        "img_str = base64.b64encode(buffered.getvalue()).decode()\n",
        "\n",
        "# construct a table\n",
        "table_html = f\"\"\"\n",
        "<table>\n",
        "<tr>\n",
        "    <td><img src='data:image/jpeg;base64,{img_str}' style='width:500px; height:auto;'></td>\n",
        "</tr>\n",
        "<tr>\n",
        "    <td style='vertical-align: bottom;'>{response.text}</td>\n",
        "</tr>\n",
        "</table>\n",
        "\"\"\"\n",
        "\n",
        "# display the table\n",
        "display(HTML(table_html))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 567
        },
        "cellView": "form",
        "id": "SEd1Wru0vDaX",
        "outputId": "568a7d66-584d-4ae2-c6b8-ce6b10c9d4e4"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "<table>\n",
              "<tr>\n",
              "    <td><img src='' style='width:500px; height:auto;'></td>\n",
              "</tr>\n",
              "<tr>\n",
              "    <td style='vertical-align: bottom;'> These look delicious! They look like chocolate chip cookies with pastel-colored candy pieces.</td>\n",
              "</tr>\n",
              "</table>\n"
            ]
          },
          "metadata": {}
        }
      ]
    }
  ]
}